using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using System;
using System.Linq;

namespace Lombiq.HelpfulLibraries.Common.Utilities;

public static class HtmlHelper
{
    /// <summary>
    /// 将提供的 <paramref name="htmlFragment"/> 解析为单个父元素的 HTML 内容.
    /// </summary>
    public static INode ParseHtmlFragment(string htmlFragment) =>
        new HtmlParser()
            .ParseFragment($"<div>{htmlFragment}</div>", contextElement: null!)
            .Single();

    /// <summary>
    /// 从提供的 <paramref name="htmlFragment"/> 中提取内部纯文本内容.
    /// </summary>
    /// <returns>人类可读的文本内容，去除周围空格和重复换行符.</returns>
    public static string ConvertToPlainText(string htmlFragment) =>
        ParseHtmlFragment(htmlFragment)
            .Text()
            .RegexReplace(@"\n(\s*\n)+", "\n")
            .Trim();
}
